set more off
set maxvar 20000

*Define directory with input data
cd "Q:"

*Create 2004-2018 SS weights extract
use "SSWGTSJQ_R.dta", clear

rename *, lower
reshape long sswgts@_cur sswgts@_ev, i(hhid pn) j(wave) string

gen waveyr=1992 if wave=="a"
replace  waveyr=1993 if wave=="b"
replace  waveyr=1994 if wave=="c"
replace  waveyr=1995 if wave=="d"
replace  waveyr=1996 if wave=="e"
replace  waveyr=1998 if wave=="f"
replace  waveyr=2000 if wave=="g"
replace  waveyr=2002 if wave=="h"
replace  waveyr=2004 if wave=="j"
replace  waveyr=2006 if wave=="k"
replace  waveyr=2008 if wave=="l"
replace  waveyr=2010 if wave=="m"
replace  waveyr=2012 if wave=="n"
replace  waveyr=2014 if wave=="o"
replace  waveyr=2016 if wave=="p"
replace  waveyr=2018 if wave=="q"
keep if waveyr!=.
sort hhid pn waveyr
drop wave

drop if sswgts_cur==. & sswgts_ev==.

save hrssswtsdata_2018, replace

*Clean 2016 DER data
use "xyrdetern2016scr.dta", clear
rename *, upper
rename SCRMEIN ErID

destring YEAR, replace
keep HHID PN YEAR ErID SEI_MED W2BOX5

*Error in 2016: some records have all .a's - set them to missing
foreach v in SEI_MED W2BOX5 {
	replace `v'=. 		if `v'==.a
}

*Flag topcoded records
gen se_topcoded=0
replace se_topcoded=1 if SEI_MED==.x | SEI_MED==.y | SEI_MED==.z
gen non_se_topcoded=0
replace non_se_topcoded=1 if W2BOX5==.x | W2BOX5==.y | W2BOX5==.z

foreach v in SEI_MED W2BOX5{
	replace `v'=250000 if `v'==.x
	replace `v'=300000 if `v'==.y
	replace `v'=500000 if `v'==.z
	replace `v'=1 		if `v'==.n
	replace `v'=-1 		if `v'==.m
	replace `v'=-250000 if `v'==.c
	replace `v'=-300000 if `v'==.b
	*replace `v'=-500000 if `v'==.a
}

keep if W2BOX5!=. & SEI_MED!=.

sort HHID PN YEAR ErID

drop ErID
collapse (sum) W2BOX5 SEI_MED se_topcoded non_se_topcoded, by(HHID PN YEAR)
rename (SEI_MED W2BOX5) (tot_se_earn non_se_earn)

gen iwyear=YEAR
gen lcy=YEAR
gen tot_se_earn_lcy=tot_se_earn
gen non_se_earn_lcy=non_se_earn 
gen se_topcoded_lcy=se_topcoded
gen non_se_topcoded_lcy=non_se_topcoded

rename HHID PN, lower
rename YEAR waveyr

save hrsderdata_2016, replace

*Clean 2016 SER data
use "xyrsumern.dta", clear
rename *, upper
rename HHID PN, lower
keep hhid pn SE* EARN*

reshape long SE EARN, i(hhid pn) j(year) 

foreach v in SE EARN {
	replace `v'=1 		if `v'==.n
	replace `v'=. 		if `v'==.m
}

gen iwyear=year
gen lcy=year
gen SE_lcy=SE
gen EARN_lcy=EARN
rename year waveyr

keep if EARN!=. & SE!=.

save hrsserdata_2016, replace